{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import csv\n",
    "import wfdb\n",
    "import heartpy as hp\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scipy.signal as ssignal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "age_group = {15:1,16:1,17:1,18:1,19:1,\n",
    "             20:2,21:2,22:2,23:2,24:2,\n",
    "             25:3,26:3,27:3,28:3,29:3,\n",
    "             30:4,31:4,32:4,33:4,34:4,\n",
    "             35:5,36:5,37:5,38:5,39:5,\n",
    "             40:6,41:6,42:6,43:6,44:6,\n",
    "             45:7,46:7,47:7,48:7,49:7,\n",
    "             50:8,51:8,52:8,53:8,54:8,\n",
    "             55:9,56:9,57:9,58:9,59:9,\n",
    "             60:10,61:10,62:10,63:10,64:10,\n",
    "             65:11,66:11,67:11,68:11,69:11,\n",
    "             70:12,71:12,72:12,73:12,74:12,\n",
    "             75:13,76:13,77:13,78:13,79:13,\n",
    "             80:14,81:14,82:14,83:14,84:14,\n",
    "             85:15,86:15,87:15,88:15,89:15,\n",
    "             90:16,91:16,92:16,93:16,94:16,\n",
    "             95:17,96:17,97:17,98:17,99:17,100:17,\n",
    "             }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      1\n",
       "1      1\n",
       "2      1\n",
       "3      1\n",
       "4      1\n",
       "5      1\n",
       "6      1\n",
       "7      1\n",
       "8      1\n",
       "9      1\n",
       "10     2\n",
       "11     2\n",
       "12     2\n",
       "13     2\n",
       "14     2\n",
       "15     2\n",
       "16     2\n",
       "17     2\n",
       "18     2\n",
       "19     2\n",
       "20     2\n",
       "21     2\n",
       "22     2\n",
       "23     2\n",
       "24     2\n",
       "25     2\n",
       "26     2\n",
       "27     2\n",
       "28     2\n",
       "29     2\n",
       "30     2\n",
       "31     2\n",
       "32     2\n",
       "33     3\n",
       "34     4\n",
       "35     4\n",
       "36     4\n",
       "37     6\n",
       "38     6\n",
       "39     6\n",
       "40     7\n",
       "41     7\n",
       "42     7\n",
       "43     8\n",
       "44     8\n",
       "45     8\n",
       "46     8\n",
       "47     9\n",
       "48     9\n",
       "49    14\n",
       "50    15\n",
       "Name: age, dtype: int64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "subjects = pd.read_excel(r\"D:\\Proga\\AML\\datasets\\lucilacapurro\\PPG_Signal_Dataset\\subjects_metadata.xlsx\",dtype = {\"ID\":str})\n",
    "#subjects.dropna(inplace=True)\n",
    "subjects = subjects[subjects['observations'].isna()]\n",
    "subjects = subjects.reset_index()\n",
    "subjects = subjects[[\"subjectcode\",\"age\"]]\n",
    "subjects[\"age\"] = subjects[\"age\"].map(age_group)\n",
    "subjects[\"age\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'record_name': 's1_sit', 'n_sig': 18, 'fs': 500, 'counter_freq': None, 'base_counter': None, 'sig_len': 254026, 'base_time': None, 'base_date': None, 'comments': ['<filename>: s1_sit <activity>: sit <gender>: female <height>: 160 <weight>: 50 <age>: 25 <bp_sys_start>: 87 <bp_sys_end>: 87 <bp_dia_start>: 59 <bp_dia_end>: 57 <hr_1_start>: 74 <hr_2_start>: 69 <hr_1_end>: 74 <hr_2_end>: 70 <spo2_start>: 98 <spo2_end>: 98'], 'sig_name': ['ecg', 'pleth_1', 'pleth_2', 'pleth_3', 'pleth_4', 'pleth_5', 'pleth_6', 'lc_1', 'lc_2', 'temp_1', 'temp_2', 'temp_3', 'a_x', 'a_y', 'a_z', 'g_x', 'g_y', 'g_z'], 'p_signal': array([[ 3.07405002e+04,  7.16949830e+04,  7.37368418e+04, ...,\n",
      "         7.73396744e-03, -4.82016791e-04,  4.57538728e-03],\n",
      "       [ 3.07200475e+04,  7.16798682e+04,  7.37258545e+04, ...,\n",
      "         6.81947805e-03,  1.24659515e-03,  4.31670814e-03],\n",
      "       [ 3.07098211e+04,  7.16798682e+04,  7.37258545e+04, ...,\n",
      "         6.14014307e-03,  1.91144590e-03,  3.37899626e-03],\n",
      "       ...,\n",
      "       [ 3.14052150e+04,  7.14440103e+04,  7.35031929e+04, ...,\n",
      "        -4.07600987e-03, -1.16348881e-03, -2.32811225e-03],\n",
      "       [ 3.13234039e+04,  7.14240787e+04,  7.34942237e+04, ...,\n",
      "        -3.68408584e-03, -7.64578358e-04, -2.47361927e-03],\n",
      "       [ 3.12415929e+04,  7.14280650e+04,  7.35011749e+04, ...,\n",
      "        -3.00475086e-03, -8.97548507e-04, -2.19877268e-03]]), 'd_signal': None, 'e_p_signal': None, 'e_d_signal': None, 'file_name': ['s1_sit.dat', 's1_sit.dat', 's1_sit.dat', 's1_sit.dat', 's1_sit.dat', 's1_sit.dat', 's1_sit.dat', 's1_sit.dat', 's1_sit.dat', 's1_sit.dat', 's1_sit.dat', 's1_sit.dat', 's1_sit.dat', 's1_sit.dat', 's1_sit.dat', 's1_sit.dat', 's1_sit.dat', 's1_sit.dat'], 'fmt': ['212', '212', '212', '212', '212', '212', '212', '212', '212', '212', '212', '212', '212', '212', '212', '212', '212', '212'], 'samps_per_frame': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'skew': [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], 'byte_offset': [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], 'adc_gain': [0.09778630723913237, 6.020574686815348, 4.459683816088181, 14.517698343504795, 2.856934028974484, 1.0317436093094239, 3.1687298626420213, 0.015003494070973998, 0.016111825759922257, 5933.313397129187, 4705.729310822031, 5648.217625744356, 19940.206423535743, 26716.762042335686, 24167.766212929266, 38272.72383973415, 60163.879223257376, 61852.68761210438], 'baseline': [-4238, -430736, -328148, -85306, -242438, -96694, -20733, -108087, -102285, -200457, -172065, -192271, -84631, -36670, 205039, -20, 97, 91], 'units': ['mV', 'NU', 'NU', 'NU', 'NU', 'NU', 'NU', 'NU', 'NU', 'C', 'C', 'C', 'g', 'g', 'g', 'deg/s', 'deg/s', 'deg/s'], 'adc_res': [12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12], 'adc_zero': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'init_value': [-1232, 909, 695, -1930, -484, -979, -1216, -1567, -1154, 148, -305, -1894, 1080, -31, 802, 276, 68, 374], 'checksum': [27380, 31519, 65163, 28840, 7943, 30654, 37387, 62594, 40012, 57005, 58136, 29325, 59748, 43521, 9513, 63081, 44363, 34207], 'block_size': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}\n"
     ]
    }
   ],
   "source": [
    "record = wfdb.rdrecord('D:\\\\Proga\\\\AML\\datasets\\\\physionet.org\\\\files\\\\pulse-transit-time-ppg\\\\1.1.0\\\\s1_sit')\n",
    "print(record.__dict__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cheby(ppg, fs, order=4, rs=20, btype='low'):\n",
    "    nyquist = 0.5 * fs\n",
    "    cutoff = 10 / nyquist\n",
    "    rs, order = 20, 4\n",
    "    b, a = ssignal.cheby2(N=order, rs=rs, Wn=cutoff, btype='low')\n",
    "    return ssignal.filtfilt(b, a, ppg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1, '1', None, None, None, 'BadSignalWarning')\n",
      "Process end: 1\n",
      "(2, '2', None, None, None, 'BadSignalWarning')\n",
      "Process end: 2\n",
      "(3, '3', None, None, None, 'BadSignalWarning')\n",
      "Process end: 3\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\toha2\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\numpy\\core\\fromnumeric.py:3464: RuntimeWarning: Mean of empty slice.\n",
      "  return _methods._mean(a, axis=axis, dtype=dtype,\n",
      "c:\\Users\\toha2\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\numpy\\core\\_methods.py:269: RuntimeWarning: Degrees of freedom <= 0 for slice\n",
      "  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n",
      "c:\\Users\\toha2\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\numpy\\ma\\core.py:5317: RuntimeWarning: Mean of empty slice.\n",
      "  result = super().mean(axis=axis, dtype=dtype, **kwargs)[()]\n",
      "c:\\Users\\toha2\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\numpy\\core\\fromnumeric.py:3747: RuntimeWarning: Degrees of freedom <= 0 for slice\n",
      "  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n"
     ]
    },
    {
     "ename": "KeyError",
     "evalue": "'ID'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "File \u001b[1;32mc:\\Users\\toha2\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:3805\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m   3804\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 3805\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   3806\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
      "File \u001b[1;32mindex.pyx:167\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
      "File \u001b[1;32mindex.pyx:196\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
      "File \u001b[1;32mpandas\\\\_libs\\\\hashtable_class_helper.pxi:7081\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
      "File \u001b[1;32mpandas\\\\_libs\\\\hashtable_class_helper.pxi:7089\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;31mKeyError\u001b[0m: 'ID'",
      "\nThe above exception was the direct cause of the following exception:\n",
      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[7], line 25\u001b[0m\n\u001b[0;32m     23\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m     24\u001b[0m     wd, m \u001b[38;5;241m=\u001b[39m hp\u001b[38;5;241m.\u001b[39mprocess(signal\u001b[38;5;241m.\u001b[39mflatten(), fs)\n\u001b[1;32m---> 25\u001b[0m     idx \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mint\u001b[39m(subjects[\u001b[43msubjects\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mID\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mid\u001b[39m]\u001b[38;5;241m.\u001b[39mindex\u001b[38;5;241m.\u001b[39mto_list()[\u001b[38;5;241m0\u001b[39m])\n\u001b[0;32m     26\u001b[0m     age_group \u001b[38;5;241m=\u001b[39m age_group[idx]\n\u001b[0;32m     28\u001b[0m     result_metadata\u001b[38;5;241m.\u001b[39mappend({\n\u001b[0;32m     29\u001b[0m         \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mid\u001b[39m\u001b[38;5;124m'\u001b[39m: new_id,\n\u001b[0;32m     30\u001b[0m         \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mage_group\u001b[39m\u001b[38;5;124m'\u001b[39m: age_group,\n\u001b[0;32m     31\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mm\n\u001b[0;32m     32\u001b[0m     })\n",
      "File \u001b[1;32mc:\\Users\\toha2\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\pandas\\core\\frame.py:4102\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m   4100\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m   4101\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[1;32m-> 4102\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   4103\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[0;32m   4104\u001b[0m     indexer \u001b[38;5;241m=\u001b[39m [indexer]\n",
      "File \u001b[1;32mc:\\Users\\toha2\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:3812\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m   3807\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[0;32m   3808\u001b[0m         \u001b[38;5;28misinstance\u001b[39m(casted_key, abc\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[0;32m   3809\u001b[0m         \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[0;32m   3810\u001b[0m     ):\n\u001b[0;32m   3811\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[1;32m-> 3812\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01merr\u001b[39;00m\n\u001b[0;32m   3813\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[0;32m   3814\u001b[0m     \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[0;32m   3815\u001b[0m     \u001b[38;5;66;03m#  InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[0;32m   3816\u001b[0m     \u001b[38;5;66;03m#  the TypeError.\u001b[39;00m\n\u001b[0;32m   3817\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
      "\u001b[1;31mKeyError\u001b[0m: 'ID'"
     ]
    }
   ],
   "source": [
    "result = []\n",
    "result_metadata = []\n",
    "\n",
    "new_id = 0\n",
    "ids = range(1,23)\n",
    "for id in ids:\n",
    "    id = str(id)\n",
    "    new_id += 1 \n",
    "    try:\n",
    "        record = wfdb.rdrecord('D:\\\\Proga\\\\AML\\datasets\\\\physionet.org\\\\files\\\\pulse-transit-time-ppg\\\\1.1.0\\\\'+\"s\"+id+\"_sit\")\n",
    "\n",
    "        #print(record.__dict__)\n",
    "        #print(record.p_signal.shape)\n",
    "        #channel_index = record.sig_name.index('NIBP')\n",
    "        signal = record.p_signal.astype(np.float32)\n",
    "        fs = record.fs\n",
    "\n",
    "        five_minute_samples = 60*5*fs\n",
    "\n",
    "        signal = signal[:five_minute_samples]\n",
    "        signal = cheby(signal,fs,order = 4)\n",
    "        \n",
    "        try:\n",
    "            wd, m = hp.process(signal.flatten(), fs)\n",
    "            idx = int(subjects[subjects[\"ID\"] == id].index.to_list()[0])\n",
    "            age_group = age_group[idx]\n",
    "            \n",
    "            result_metadata.append({\n",
    "                'id': new_id,\n",
    "                'age_group': age_group,\n",
    "                **m\n",
    "            })\n",
    "            \n",
    "            print((new_id, id, age_group, signal, m))\n",
    "\n",
    "        except hp.exceptions.BadSignalWarning as e:\n",
    "            result_metadata.append({\n",
    "                'id': new_id,\n",
    "                'age_group': None,\n",
    "                'error': \"BadSignalWarning\"\n",
    "            })\n",
    "            print((new_id, id, None, None, None, \"BadSignalWarning\"))\n",
    "\n",
    "    except FileNotFoundError as e:\n",
    "        result_metadata.append({\n",
    "            'id': new_id,\n",
    "            'age_group': None,\n",
    "            'error': \"FileNotFound\"\n",
    "        })\n",
    "        print((new_id, id, None, None, None, \"FileNotFound\"))\n",
    "\n",
    "    print(f\"Process end: {id}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
